#!/usr/bin/env python3
'''
Clean log file output from the fuzztest test harness.  This tool collapses
duplicate outputs into a single copy preceded by all the fuzz lines that
triggered it. To allow useful comparisons of fuzztest logs it imposes
a canonical ordering on the entries.
'''
import argparse
import collections
import errno
import operator
import re
import sys
from itertools import chain, groupby, zip_longest, repeat

__version__ = "1.0"
__date__ = "15 September 2012"
__author__ = "Tim Eves <tim_eves@sil.org>"
__license__ = '''
SPDX-License-Identifier: MIT OR MPL-2.0 OR LGPL-2.1-or-later OR GPL-2.0-or-later
Copyright 2012, SIL International, All rights reserved.
'''

recm = re.compile(r'^(-?\d+)?\s*,\s*(0[xX][\da-fA-F]+)\s*,\s*'
                  r'(0[xX][\da-fA-F]+|\d+)\s*,?(.*)$')
valgm = re.compile(r'^==\d+==(\s+(?:at|by)?\s*)(?:0[xX][\da-fA-F]+:)?',
                   re.MULTILINE)


class fuzz(collections.namedtuple('fuzz', 'ret position value comment')):
    def __str__(self):
        return "{0},{1.position:#010X},{1.value: >3d}{2!s}{3!s}".format(
                            self.ret or '',
                            self,
                            (self.comment or '') and ',',
                            self.comment or '')


class fuzz_log(collections.defaultdict):
    @staticmethod
    def __is_rec(s, rm=recm):
        return bool(rm.match(s))

    @staticmethod
    def __recs(rs):
        rs = [fuzz(int(r, 0), int(p, 0), int(o, 0), c) for r in rs
              for r, p, o, c in [recm.match(r).groups('0')]]
        return chain(*list(zip(rs[:-1], repeat(None))) + [(rs[-1],)])

    def __init__(self, fileobj):
        super(fuzz_log, self).__init__(list)
        cs = chain.from_iterable(
            self.__recs(ls) if r else (valgm.sub(r'\1', ''.join(ls)).lstrip(),)
            for r, ls in groupby(fileobj, self.__is_rec))
        for rec in zip_longest(cs, cs):
            self[rec[1]].append(rec[0])
        for rs in self.values():
            rs.sort(key=operator.itemgetter(1))


def record_sort_key(r):
    return r[1][0].position


parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('log', nargs='?', type=argparse.FileType('rt'),
                    default=sys.stdin, help='A log file generated by fuzztest')
parser.add_argument('out', nargs='?',
                    type=argparse.FileType('wt'), default=sys.stdout,
                    help='The file to send the processed log to')
parser.add_argument('--version', action='version', version=__version__)

if __name__ == '__main__':
    args = parser.parse_args()
    try:
        for err, rs in sorted(fuzz_log(args.log).items(),
                              key=record_sort_key):
            args.out.writelines(['\n'.join(map(str, rs)), '\n'])
            if err:
                args.out.write(err)
            args.out.flush()
    except IOError as io:
        if io.errno != errno.EPIPE:
            sys.stderr.write("{0}: {1!s}\n".format(parser.prog, io))
            sys.exit(1)
